#!/bin/bash
#
# Usage: GetStutterModels hipstr_genotypes.vcf.gz
#           Extract the stutter models from a HipSTR-generated VCF and output it to stdout

if [ "$#" -eq 1 ]; then
    # Make sure vcftools is installed
    hash vcftools 2>/dev/null || { echo >&2 "ERROR: This script requires vcftools. Please check that it's appropriately installed"; exit 1; }

    # Make sure the file exists
    if [ ! -e $1 ]
	then
	cat <<msg
ERROR: VCF file $1 does not exist
msg
	exit 1
    fi

    # Make sure the file ends in .gz
    if [ ! ${1: -3} == ".gz" ]
    then
	cat <<usage
ERROR: The alignment file must be bgzipped (and end in .gz)
usage
	exit 1
    fi

    vcftools --gzvcf $1 --stdout --get-INFO START --get-INFO END --get-INFO INFRAME_PGEOM --get-INFO INFRAME_DOWN --get-INFO INFRAME_UP --get-INFO OUTFRAME_PGEOM --get-INFO OUTFRAME_DOWN --get-INFO OUTFRAME_UP | tail -n +2 | cut -f 1,5- | awk -v OFS="\t" '{print $1, $2, $3, $4, ($5 > 0.01 ? $5 : 0.01), ($6 > 0.01 ? $6 : 0.01), $7, ($8 > 0.01 ? $8 : 0.01), ($9 > 0.01 ? $9 : 0.01)}'


else
    cat <<usage
Usage: GetStutterModels hipstr_genotypes.vcf.gz
           Extract the stutter models from a HipSTR-generated VCF and output it to stdout
usage
fi
